Crime Report Heatmap

Map

Los Angeles Crime Report Heatmap (2010 - 2019):

Code

# Crime Data from 2010 to Present
# From https://data.lacity.org/A-Safe-City/Crime-Data-from-2010-to-Present/y8tr-7khq/data

#### Heatmap Attempt #### 

crime <- read.csv("/Users/charliecarter/Downloads/Crime_Data_from_2010_to_Present.csv",
                  stringsAsFactors = F)

## Parallel Computing To Derive Latitude and Longitude Quickly ####
library(parallel)
no_cores <- detectCores() - 1
cl <- makeCluster(no_cores)

## get lat
crime$lat <- parLapply(cl, X = crime$Location, fun = function(x) {
  as.numeric(
    unlist(
      strsplit(substr(x, 2, nchar(x) - 1), 
               split= ", ")[[1]][1] 
    )
  )
})

## get lon
crime$lon <- parLapply(cl, X = crime$Location, fun = function(x) {
  as.numeric(
    unlist(
      strsplit(substr(x, 2, nchar(x) - 1), 
               split= ", ")[[1]][2] 
    )
  )
})

stopCluster(cl)
crime$lat <- unlist(crime$lat)
crime$lon <- unlist(crime$lon)
#############

## Filter out outlier longitude points
crime <- crime %>% 
  filter(crime$lon != 0)

## get contour lines that show regions by density
density.poly <- contourLines(
  kde2d(crime$lon, crime$lat,
        lims = c(
          expand_range(range(crime$lon), add = 0.5),
          expand_range(range(crime$lat), add = 0.5)
        ))
)

## create column that subsequent loop will modify
crime$density <- 0

## from highest to lowest density levels (which is default)
## get polygons, assign density val to points within them
for (i in 1:length(density.poly)) {
  hold.data <- point.in.polygon(crime$lon, crime$lat, 
                                density.poly[[i]]$x, density.poly[[i]]$y)
  crime$density[which(hold.data==1)] <- density.poly[[i]]$level
}

## create year column for animation iteration
crime <- crime %>% 
  mutate(Date.Occurred = as.Date(Date.Occurred, format = "%m/%d/%Y")) %>% 
  mutate(year = format(Date.Occurred, "%Y")) %>% 
  mutate(year = as.integer(year))

## Get Google Map Object from API
ggmap::register_google(key = "AIzaSyBgYVpL1in38KcEnznCFwmRKz2d1VJKSe4")
ggmap_show_api_key()
la.map <-   ggmap(get_googlemap("Los Angeles, California"))

#### point with color test ####
crime.density <- la.map +
  geom_point(crime, mapping = aes(x = lon, y = lat, 
                                  alpha = 0.01, 
                                  color = density,
                                  frame = year), size = 0.005, shape = ".") +
  scale_color_gradient2(low = "white", 
                       mid = "lightblue", 
                       high = "blue",
                       midpoint = mean(crime$density, na.rm = T)) +
  theme(
    legend.position='none'
  )


animation::ani.options(interval = 2)

crime.density.anim <- crime.density +
  transition_states(crime$year,
                    transition_length = 10,
                    state_length = 50) +
  labs(title = "Year: {closest_state}")
                  
animate(crime.density.anim, nframes = 50)

Los Angeles Crime Report Density Comparison By Gender

Maps

Female Crime Reports, By Relative Density
Male Crime Reports, By Relative Density

Code

The software I was using on macOS, RMarkdown, does not always recognize path variables when specified in bash_profile. To stitch the images I produced together into gif’s, I had to use ImageMagick from my computer’s Terminal application. Otherwise, everything else was done with this R code.

#### LIBRARIES ####

## DATA MANAGEMENT
library(tidyverse)
library(magrittr)
library(tictoc)
library(MASS)

## MAPS
library(sf)
library(rgdal)
# devtools::install_github("dkahle/ggmap", ref = "tidyup", force=TRUE)
library("ggmap")
library(sp)
library(scales)

#Set your API Key
ggmap::register_google(key = "AIzaSyBgYVpL1in38KcEnznCFwmRKz2d1VJKSe4")
# library(ggiraph)
# library(widgetframe)

## VISUALIZATION
# devtools::install_github('cran/ggplot2') 
library(ggplot2)
library(gganimate)
library(gifski)
## install.packages("/Users/charliecarter/Downloads/gifski_0.8.6.tar", repos = NULL,type = "binary")


## trying alternative density measure
## https://slowkow.com/notes/ggplot2-color-by-density/

# Get density of points in 2 dimensions.
# @param x A numeric vector.
# @param y A numeric vector.
# @param n Create a square n by n grid to compute density.
# @return The density within each square.
get_density <- function(x, y, ...) {
  dens <- MASS::kde2d(x, y, ...)
  ix <- findInterval(x, dens$x)
  iy <- findInterval(y, dens$y)
  ii <- cbind(ix, iy)
  return(dens$z[ii])
}

## Create column with year data
crime <- crime %>% 
  mutate(Date.Occurred = as.Date(Date.Occurred, format = "%m/%d/%Y")) %>% 
  mutate(year = format(Date.Occurred, "%Y")) %>% 
  mutate(year = as.factor(year))

## Create female subset
  fem.crime <- crime %>% 
    filter(Victim.Sex == "F")

## Create male subset
  male.crime <- crime %>% 
    filter(Victim.Sex == "M")

## get density of male and female crime reports in subsets, by year
  for(year in levels(fem.crime$year)) {
    fem.crime$dens_f[fem.crime$year == year] <- 
      get_density(fem.crime$lon[fem.crime$year == year], 
                  fem.crime$lat[fem.crime$year == year], 
                  n= 300)
  }
    for(year in levels(male.crime$year)) {
    male.crime$dens_m[male.crime$year == year] <- 
      get_density(male.crime$lon[male.crime$year == year], 
                  male.crime$lat[male.crime$year == year], 
                  n= 300)
  }


###########

# ggmap::register_google(key = "AIzaSyBgYVpL1in38KcEnznCFwmRKz2d1VJKSe4")
# ggmap_show_api_key()
# la.map <-   ggmap(get_googlemap("Los Angeles, California"))
setwd("/Users/charliecarter/Documents/Code Resume/Data")
saveRDS(la.map, file = "LA_map")

### FEMALE ####

  if(!dir.exists("/Users/charliecarter/examples")) {
    dir.create("/Users/charliecarter/examples")
  } 
  if(!dir.exists("/Users/charliecarter/examples/fem_crime")) {
    dir.create("/Users/charliecarter/examples/fem_crime")
  }
  setwd("/Users/charliecarter/examples/fem_crime")

# iterate over years and make map png's 
png(file="femcrime%02d.png", width=400, height=400)
for (year in levels(fem.crime$year)){
  n <- nrow(fem.crime[fem.crime$year == year,])
  print(paste(year, ": ", n))
  #### point with color test ####
  crime.density <- la.map +
    geom_point(fem.crime[fem.crime$year == year,1:ncol(fem.crime)], 
               mapping = aes(x = lon, 
                             y = lat,
                             alpha = 0.01,
                             color = dens_f), 
               size = 0.005, 
               shape = ".") +
    scale_color_viridis_c(option = "A", alpha = 0.5) + 
    labs(title = paste("Los Angeles Crime Reports With Female Victims,\nBy Density\nYear: ",year,"\nNumber of Reports: ", n)) +
    theme(
      legend.position='none'
    )
  plot(crime.density)
}
dev.off()


### MALE ####

  if(!dir.exists("/Users/charliecarter/examples")) {
    dir.create("/Users/charliecarter/examples")
  } 
  if(!dir.exists("/Users/charliecarter/examples/male_crime")) {
    dir.create("/Users/charliecarter/examples/male_crime")
  }
  setwd("/Users/charliecarter/examples/male_crime")

# iterate over years and make map png's 
png(file="malecrime%02d.png", width=400, height=400)
for (year in levels(male.crime$year)) {
  n <- nrow(male.crime[male.crime$year == year,])
  print(paste(year, ": ", n))
  #### point with color test ####
  crime.density <- la.map +
    geom_point(male.crime[male.crime$year == year, 1:ncol(male.crime)], 
               mapping = aes(x = lon, 
                             y = lat,
                             alpha = 0.01,
                             color = dens_m), 
               size = 0.005, 
               shape = ".") +
    scale_color_viridis_c(option = "A", alpha = 0.5) + 
    labs(title = paste("Los Angeles Crime Reports With Male Victims,\nBy Density\nYear: ",year,"\nNumber of Reports: ", n)) +
    theme(
      legend.position='none'
    )
  plot(crime.density)
}
dev.off()

These two gif’s are synchronized using JavaScript code.

<script type="text/javascript">
$(window).load(function() {
    $('.preload').attr('src', function(i,a){
        $(this).attr('src','').removeClass('preload').attr('src',a);
    });
});
</script>

British Reported Interest in Politics By Gender

Graph

Code

# Data from https://www.understandingsociety.ac.uk

# Read in and join data from 7 different files
library(tidyverse)
library(data.table)
# data.table is faster compared to readr so we'll use it in this case (the function fread()). You need to install this package first to be able to run this code.
# create a vector with the file names and paths
files <- dir(
             # Select the folder where the files are stored.
             "/Users/charliecarter/Documents/EXETER/MODULES TERM 4/Data Analysis III/priv_data3_2019/data/UKDA-6614-tab/tab",
             # Tell R which pattern you want present in the files it will display.
             pattern = "indresp",
             # We want this process to repeat through the entire folder.
             recursive = TRUE,
             # And finally want R to show us the entire file path, rather than just
             # the names of the individual files.
             full.names = TRUE)
# Select only files from the UKHLS.
files <- files[stringr::str_detect(files, "ukhls")]
# files
# create a vector of variable names
vars <- c("memorig", "sex_dv", "age_dv", "vote6")
for (i in 1:7) {
        # Create a vector of the variables with the correct prefix.
        varsToSelect <- paste(letters[i], vars, sep = "_")
        # Add pidp to this vector (no prefix for pidp)
        varsToSelect <- c("pidp", varsToSelect)
        # Now read the data. 
        data <- fread(files[i], select = varsToSelect)
        if (i == 1) {
                all7 <- data  
        }
        else {
                all7 <- full_join(all7, data, by = "pidp")
        }
        # Now we can remove data to free up the memory.
        rm(data)
} 


## create long version from wide data
Long <- all7 %>%
  ## gather data into the "very long" format
  gather(a_memorig:g_vote6, key = "variable", value = "value") %>% 
  ## split the column with variable names into two (one for wave and one for generic variable name)
  separate(variable, into = c("wave", "variable"), sep = "_", extra = "merge") %>% 
  ## convert into the format we need 
  spread(key = variable, value = value)

# Modify dataset
Long <- Long %>%
  # Only use data from Understanding Society Waves
  filter(memorig == 1) %>%
  # Recode sex variable to "Male" or "Female" character string
  mutate(sex_dv = ifelse(sex_dv == 2, "Female",
                         ifelse(sex_dv == 1, "Male",
                                NA_character_))) %>%
  # recode vote 6 to logical binary
  mutate(vote6 = case_when(vote6 < 0 ~ NA_integer_, TRUE ~ vote6))
  
# create histogram plot of political interest levels
hist.interest <- Long %>% 
  # filter out NA's 
  filter(!is.na(sex_dv)) %>% 
  ggplot() + 
  # create histogram
  geom_histogram(aes(x = vote6, fill=sex_dv)) +
  # facet by sex
  facet_grid(cols = vars(sex_dv)) +
  # adjust graph labels
  labs(title = "Comparison of Mean Political Interest, by Gender",
       fill = "Respondent Gender",
       subtitle = "From Understanding Society: The UK Household Longitudinal Survey") +
  # create x axis scale
  scale_x_reverse(labels = c("Very", "Fairly", "Not Very", "Not at all"),
                  name= "How interested would you say you are in politics?") +
  # increase panel spacing
  theme(panel.spacing = unit(2, "lines"))